home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
TPUG - Toronto PET Users Group
/
TPUG Users Group CD
/
TPUG Users Group CD.iso
/
AMIGA
/
AMICUS
/
AMIBEST2.ADF
/
Best of AMICUS 2
/
Txt
/
txt.c
< prev
next >
Wrap
C/C++ Source or Header
|
1987-07-22
|
13KB
|
518 lines
/***************************************************************************/
/* txt Reformat to TEXT: cr to lf, tabs, wordwrap, strip 0x80, ctrl chrs */
/* Copyright (c) 1986, 1987 by Ray Lance */
/* Released to the public domain, freely redistributable "shareware", but */
/* distribution must be of this entire txt.arc file. 8/24/86 v.1 13:10 */
/***************************************************************************/
/* 0x08 = backspace handling added 10/03/86 13:30 */
/* Re[-p]aragraphing wordwrap option added 12/22/86 v.2 17:20 */
/* Don't re-paragraph item [-h]eaders option added 12/25/86 22:15 */
/* Aztec-ized (removed unsigned char dependency) 12/26/86 04:45 */
/* Don't remove extra [-f]ormfeeds option added 12/27/86 18:40 */
/* Insert [-r]eturn in output line-endings added 12/27/86 19:30 */
/* Options listing if ? or no paramaters added 12/27/86 20:15 */
/* Don't [-b]ackspace made optional added 12/27/86 21:45 */
/* Turn [-s]paces into tabs added 12/28/86 03:30 */
/* Output to TMP (MSDOS) + outfile msg added 12/31/86 08:20 */
/* Default MSDOS line ending is cr/lf added 1/01/87 14:00 */
/* Remove [-d]oublespacing added 1/02/87 20:10 */
/* Don't EOF on ^Z (-z) added 1/05/87 22:00 */
/***************************************************************************/
#include <stdio.h>
#include <ctype.h>
#ifdef M68000 /* Lattice? */
/* #define L303 1 /* NOTE: must be defined for ver 3.03 */
#ifdef L303
extern Enable_Abort; /* ^C handling */
#endif
int strlen(char *);
char *strchr(char *, int);
char *strcpy(char *, char *);
char *strcat(char *, char *);
#else
#include <string.h>
#endif
#ifdef M_I86 /* MSC 4.00 options? */
#include <stdlib.h>
#define local static /* locally defined for MS CodeView debugger */
char rd[] = "rb"; /* & O_BINARY, 'cause default is translate */
char wr[] = "wb";
#else
/* NO, set for Lattice and Manx Aztec: */
#define local /* globally defined for Amiga MetaScope debugger */
char rd[] = "r"; /* & others default to no translate. */
char wr[] = "w";
#ifdef MPU68000
#define AMIGA 1
#endif
#endif
#ifdef MSDOS
#define dirchar '\\'
#else
#define dirchar '/'
#endif
#ifdef AMIGA /* Aztec & Lattice 3.03 have non-Unix: */
#define memset(d, c, n) setmem(d, n, c)
#define memcpy(d, s, n) movmem(s, d, n)
#endif /* (while Lattice 3.10 has both versions) */
#ifndef min
#define min(a,b) ((a) < (b) ? (a) : (b))
#define max(a,b) ((a) > (b) ? (a) : (b))
#endif
char *strvchr();
local FILE *in, *out;
local char filename[64] = "ram:";
local struct {
char nul; /* null stopper for backward search of */
char lin[256]; /* the OUTPUT LINE */
char stp; /* extra for forced chopping */
} inp, outp;
#define line outp.lin
local char opt[] = "ctbdfrwhpxsz"; /* Command Line options: if specified, */
local char kopt[] = "ctbdfrwhpxsz";
#define ctl 0 /* control chars ( < 0x20 ): 0=discard, else + 0x80
except tab, which is treated separately: */
#define tab 1 /* tab columns per tab: 0 = 8 */
#define bsp 2 /* don't backspace */
#define dbl 3 /* delete doublespacing */
#define frm 4 /* don't remove extra formfeeds */
#define ret 5 /* insert return in output line-endings */
#define wrp 6 /* word wrap: 0 or no col # = 77 -- default 255 */
#define hdr 7 /* don't hard-paragraph item headers */
#define par 8 /* re-paragraph word wrap */
#define x80 9 /* don't strip 80 bits */
#define spc 10 /* turn multiple spaces into tabs every n columns */
#define end 11 /* don't EOF on ^Z */
char *optlist[] = {
" USAGE: TXT [options] input_filename [options] [output_filename] [options]",
#ifndef MSDOS
" OUTPUT will go to RAM:input_filename if not specified",
#else
" OUTPUT will go to TMP\\input_filename if not specified",
#endif
" ",
" The following options must be lowercase and preceded by a hyphen each:",
" Where: ([n] = 0 if omitted)",
" -b don't delete prev char on backspace, leave bsp to -c option",
" -c[n] n=0: discard control chars (chars < 0x20); n>0: char + 0x80",
" -d un-doublespace (remove every other extra line ending)",
" -f don't remove extra formfeeds, leave them to the -c option",
" -h don't preserve format of item headers (text surrounded by",
" lines of at least 4 same characters, like \"----\"), but...",
" -p invoke re-paragraphing: i.e., the changing of line-endings to",
" spaces except when the 1st char of the next line isspace()",
#ifndef MSDOS
" -r[n] n=0: cr 'sted linefeed at endline; n>0: output both cr & lf",
#else
" -r[n] n=0: delete return from endline cr/lf; n>0: delete linefeed",
#endif
" -s[n] change multiple spaces into tabs to every n columns (0 = 8)",
" -t[n] change tabs into spaces to every n columns (0 = 8)",
" -w[n] wordwrap at n chars/line (n can be up to 254; 0 = 77)",
" -z don't end file on ^Z, let -c option handle the character",
" -x don't strip 0x80 bits before any of the above",
" ",
" You may enter option(s) now; then you will be asked for input filename:",
#ifndef MSDOS
" (or ^C to abort) (output to RAM:)",
#else
" (or ^C to abort) (output to TMP\\)",
#endif
""};
local int s = 0; /* cols in source line image */
local int m = 0; /* cols in output line image */
local int spaces = 0; /* spaces converted from tab, yet to output */
local int tabin, tabout; /* # cols in input & output tabs */
local char *g; /* where to get from */
local char *w; /* end of get buffer */
main(argc,argv)
int argc;
char *argv[];
{
register char *p = line; /* assembling line */
register int c = 0, l = 0; /* char, last char */
int dsp = 0; /* kill next endline (doublespacing) */
int ff = 0; /* last char formfeed or not space */
int i = 0; /* count of identical non-data chars at begin line */
int j = 0; /* count of chars in incoming line */
int k = 0; /* identity of identical char */
int n = 0; /* general work storage */
int z = 0; /* # spaces to replace tab with */
char *endlin, pgh, pgf, *force(), *newline(), *rm_end_spc(); /* wordwrap */
char **o, *f; /* options display &c. */
char *b; /* backwards looking for space-to-tab output */
#ifdef L303
Enable_Abort = 1; /* Lattice ^C handling */
#endif
getopt(&argc, argv, opt); /* GET COMMAND LINE OPTIONS */
if (argc < 2 || *argv[1] == '?')
{
for (o = optlist; **o; fprintf(stderr, "%s\n", *o), o++);
gets(f = line);
while (*f == ' ')
strcpy(f, f+1);
if (strlen(line))
{
argc = 2;
argv[1] = line;
strcpy(opt, kopt);
getopt(&argc, argv, opt);
}
}
if (opt[tab] == 0)
opt[tab] = 8; /* tab defaults to 8 spaces */
else if (++opt[tab])
opt[tab]--;
if (opt[spc] == 0)
opt[spc] = 8; /* as does spaces-to-tabs */
else if (++opt[spc])
opt[spc]--;
if ((tabin = opt[tab]) == 0)
tabin = 8;
if ((tabout = opt[spc]) == 0)
tabout = tabin;
if (opt[wrp] == 0)
opt[wrp] = 77; /* & word wrap after col 77 */
if (opt[x80]--)
opt[x80] = 127; /* strip 80 can only do that (or not) */
if (opt[par] != -1)
opt[par] = 0; /* re-paragraphing active = 0 */
pgf = pgh = opt[par];
opt[hdr]++; /* hard-headers active = 0 */
opt[bsp]++;
opt[dbl]++;
opt[frm]++;
#ifdef MSDOS
if (opt[ret] > 0)
opt[ret] = 1;
#else
opt[ret]++;
#endif
if (opt[ret] > 0 && opt[wrp] == -1)
opt[wrp]--;
endlin = line + opt[wrp];
memset(line, 0, sizeof(line));
while (!(in = fopen(argv[1], rd))) /* OPEN INPUT FILE */
{
fprintf(stderr, "Can't open input %s! New file name: ", argv[1]);
gets(argv[1] = line);
}
if (argc < 3)
{
#ifdef MSDOS
if ((f = getenv("TMP")) || (f = getenv("ARCTEMP")) ) {
strcpy(filename, f);
strcat(filename, "\\");
}
#endif
if (!(f = strvchr(argv[1] + strlen(argv[1]) - 1, dirchar)))
if (!(f = strchr(argv[1], ':')))
f = argv[1] - 1;
argv[2] = strcat(filename, ++f); /* DEFAULT OUTPUT TO RAM: */
}
while (!(out = fopen(argv[2], wr))) /* OPEN OUTPUT FILE */
{
fprintf(stderr, "Can't open output %s! New file name: ",argv[2]);
gets(argv[2] = filename);
}
while ((c = get()) != EOF)
{
switch (c &= opt[x80])
{
case 26: /* MSDOS EOF (^Z) */
if (opt[end] != EOF)
goto other;
unget(EOF);
break;
case '\r': /* <RETURN> */
if (((c = get()) & opt[x80]) != '\n')
unget(c);
case '\n': /* LF */
j = -1;
c = unget(get()) & opt[x80];
/* discard doublespacing... */
if (opt[dbl] && ( dsp =
(!dsp & (c == '\n' | c == '\r'))))
break;
p = rm_end_spc(p); /* kill trailing spaces */
s = spaces = 0;
if (!opt[hdr]) /* if hard-hdrs not killed, */
if (i > 3) { /* and line is 4 or more */
if (k == l) /* identical chars, */
{ k = 0; pgh=opt[par]; }
else { /* let it flip-flop */
if (m > i) {
unget('\n');
p = force(p);
break;
}
k = l; /* re-paragraphing. */
pgf = pgh = 1;
}
}
/* newline goes out as either nl or space: */
if (isspace(c) || pgf || (p <= line))
p = newline(p);
else {unget(' '); j--;}
pgf = pgh;
break;
case '\b': /* BACKSPACE */
if (opt[bsp])
goto other;
if (m) {
p--;
m--;
s--;
}
break;
case '\f': /* FORMFEED */
if (opt[frm] || c != ff)
goto other;
break;
case '\t': /* TAB */
z = tabin - (s % tabin) - 1;
if (opt[tab]) {
spaces += z;
c = ' ';
}
else {
s += z;
m += tabout - (m % tabout) - 1;
}
default:
other: if (c < ' ' && c != '\t')
if (opt[ctl] == 0) /* discard ctrls? */
break;
else if (opt[ctl] != -1)
c |= 128; /* no, shift hi */
*p++ = c; /* STORE CHAR IN LINE */
if (c != '\f') {
m++;
s++;
}
if (opt[spc] && !(m % tabout))
{
for (b = p; *--b == ' ';);
if ((z = p - ++b) > 1)
{
p = b;
n = z % tabout != 0;
z = tabout * (z/tabout+n);
m -= z;
s -= z;
spaces += z;
}
while (spaces >= tabout)
{
*p++ = '\t';
m += tabout;
s += tabout;
spaces -= tabout;
}
}
if (m >= (unsigned char)opt[wrp])
p = force(p); /* if past end */
break;
}
if (i == j && (i == 0 || c == l)
&& c > ' ' && !isalnum(c) )
i++; /* count: identical non-data chars at begin line */
else i = 0;
if (c == '\f' || !isspace(c))
ff = c;
l = c;
j++;
}
if (p > line)
newline(p);
fprintf(stderr, "Input was %ld bytes\n", ftell(in));
fprintf(stderr, "Output is %ld bytes in \"%s\".\n", ftell(out), argv[2]);
return(exit(0));
}
char *force(p) /* wrap at any space or punctuation */
char *p;
{
register char *g, *w;
w = g = p;
if (opt[wrp] != -1) {
for (; --g >= line && !isspace(*g);); /* find first space, */
if (p - g > 10 || g < line) {
while (--p >= line && !ispunct(*p)); /* and punctuation; */
if (p > g || g < line)
g = p; /* pick punctuation if spc <-- 10 */
if (g < line)
g = w - 1; /* or force the bloody thing if neither. */
}
for (++g; --w >= g; unget(*w)); /* save chars, */
newline(g); /* make new line, */
} else fwrite(line, p - line, 1, out);
return(line); /* & return ptr. */
}
char *newline(p) /* put newline: */
char *p;
{
p = rm_end_spc(p); /* delete trailing spaces */
if (opt[ret])
*p++ = '\r';
if (opt[ret] != 1)
*p++ = '\n';
fwrite(line, p - line, 1, out);
m = 0; /* no cols in line now */
return(line);
}
char *rm_end_spc(p) /* remove redundant end (trailing) spaces */
char *p;
{
register int n = 0;
for (; --p >= line && isspace(*p);) {
switch(*p)
{
case ' ':
spaces++;
break;
case '\t':
spaces += tabout;
break;
case '\f':
n++;
default:
break;
}
}
if (spaces < 3 || p < line)
spaces = 0;
for (; n--; *++p = '\f'); /* replace formfeeds */
return(++p);
}
getopt(argc, argv, opt) /* GET COMMAND LINE OPTIONS */
int *argc;
char **argv;
char *opt;
{
register int i, n, r = 0, nopt = strlen(opt);
char options[80], *o = options;
for (*o = 0, i = *argc; i--;) /* collect all the options first: */
{
if (**argv == '-') /* if this arg is an option, */
{
strcat(o, *argv); /* collect it, */
if (i)
memcpy((char *)&argv[0], (char *)&argv[1], i*sizeof(argv));
--*argc; /* and delete this arg. */
}
else argv++;
}
while (nopt--) /* for each option permitted: */
{
for (n = -1, o = options; *o;) /* for each option collected: */
if (o = strchr(o, '-'))
if (*++o == *opt) /* if option, */
{
n = atoi(++o); /* subst # given ( or 0) */
r++;
break;
}
*opt++ = n; /* else (option not present), kill option (-1). */
}
return(r); /* return # options actually present. */
}
char *strvchr(s, c) /* Search char from RHE of string */
register char *s;
register int c;
{
while (*s != c) /* ck for match 1st, in case it's NUL we're looking for */
if (*s-- == 0)
return(NULL); /* end string: return not found */
return(s); /* FOUND char: return location. */
}
get() /* GET NEXT CHARACTER */
{
register int c;
if (g < w)
c = *g++;
else if (spaces) {
spaces--;
c = ' ';
}
else c = fgetc(in);
return(c);
}
unget(c) /* UNGET CHARACTER */
char c;
{
if (g >= w)
g = w = &inp.stp;
return(int)(*--g = c);
}